# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from datetime import datetime
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.spiders.base import BaseSpider
from changsha.rules import parse_catalog, parse_supplier, parse_supplier_page


class CatalogSpider(BaseSpider):
    name = 'catalog'
    # 常用链接
    index_url = 'http://hunan.gpmart.cn/shopHome/gotoShopPage.action'
    supplier_list_url = 'http://hunan.gpmart.cn/frontSearchProduct/shopInfoSearch.action?pager.Keyword=&&pageIndex={}&&isOrderBySalesNumber=0'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(CatalogSpider, self).__init__(batchNo=batchNo, *args, **kwargs)

    def start_requests(self):
        # 分类
        yield Request(
            method='POST',
            url=self.index_url,
            callback=self.parse_index,
            errback=self.error_back,
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
            },
            priority=200,
            dont_filter=True
        )

    def parse_index(self, response):
        # 处理分类列表
        cats = parse_catalog(response)
        if cats:
            self.logger.info('分类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

        # 供应商
        yield Request(
            method='POST',
            url=self.supplier_list_url.format(1),
            callback=self.parse_supplier_page,
            errback=self.error_back,
            meta={
                'batchNo': self.batch_no,
                'page': 1
            },
            priority=100,
            dont_filter=True
        )

    # 处理供应商列表
    def parse_supplier_page(self, response):
        total = parse_supplier_page(response)
        if total:
            self.logger.info('供应商总页数: total=%s' % total)
            for page in range(1, total + 1):
                yield Request(
                    method='POST',
                    url=self.supplier_list_url.format(page),
                    callback=self.parse_supplier,
                    errback=self.error_back,
                    meta={
                        'reqType': 'supplier',
                        'batchNo': self.batch_no,
                        'page': page
                    },
                    priority=100,
                    dont_filter=True
                )

    # 处理供应商列表
    def parse_supplier(self, response):
        meta = response.meta
        cur_page = meta.get('page')
        suppliers = parse_supplier(response)
        if suppliers:
            self.logger.info('供应商: page=%s, count=%s' % (cur_page, len(suppliers)))
            yield Box('supplier', self.batch_no, suppliers)
        else:
            self.logger.info('供应商分页为空: page=%s, count=%s' % (cur_page, len(suppliers)))
